clear 
clear matrix
set memory 1000m
set more off
cap log close

cd "/Users/..."

global do_file="‎⁨/Users/.../do_file"
global log_file="/Users/.../log_file"
global raw_data="/Users/.../raw_data⁩⁩"
global working_data="/Users/.../working_data"
global results="/Users/.../results/072020/bootstrap"


log using log_file/bootstrap_072020.log, replace


***************************
***************************
* Use Full Sample
***************************
***************************

use working_data/clean_day_level_geo_weather_081519.dta, clear


sort user_id trip_start_date
bysort user_id: gen day_index=_n

tsset user_id day_index




************************************
* Construct measures for near-misses
************************************


gen hb1=1 if total_prev_hard_brake>0
replace hb1=0 if hb1==.

gen hb2=1 if total_prev_hard_brake>0 & total_hard_turn>0
replace hb2=0 if hb2==.

sum hb1 hb2




***********************
* Gen fisrt-diff for NM
***********************


forvalues i=1(1)2{
	gen diff_hb`i'=hb`i'-l.hb`i'
}


**********************************************
* Gen fisrt-diff for risky behavior and others
**********************************************


gen diff_agg_acc=total_agg_acc-l.total_agg_acc

gen diff_phone_use=total_phone_use-l.total_phone_use

gen diff_traffic_jam=total_traffic_jam-l.total_traffic_jam
gen diff_traffic_time=total_traffic_sec-l.total_traffic_sec
gen diff_distance=distance-l.distance
gen diff_duration=duration-l.duration
gen diff_speed=speed-l.speed
gen diff_speed2=speed2-l.speed2
gen diff_drive_at_night=drive_at_night-l.drive_at_night
gen diff_night_prop=night_duration_prop-l.night_duration_prop
gen diff_night_duration=total_night_duration-l.total_night_duration
gen diff_weekend=weekend-l.weekend
gen diff_rush_hour=rush_hour-l.rush_hour

gen diff_control_score=control_score-l.control_score
gen diff_cautious_score=cautious_score-l.cautious_score
gen diff_focused_score=focused_score-l.focused_score
gen diff_drive_score=drive_score-l.drive_score
gen diff_gap_time=gap_time-l.gap_time

gen diff_highway=highway-l.highway

** gen first_difference for weather variables, 
gen diff_sunny=sunny-l.sunny
gen diff_rain=rain-l.rain
gen diff_snow=snow-l.snow
gen diff_rain_storm=rain_storm-l.rain_storm
gen diff_cloudy=cloudy-l.cloudy

gen diff_high_temper=high_temper-l.high_temper
gen diff_low_temper=low_temper-l.low_temper



gen rain_snow=1 if rain==1 | snow==1 | rain_storm==1
replace rain_snow=0 if rain_snow==.

	


preserve
	collapse (mean) total_phone_use distance highway ///
	speed drive_at_night total_traffic_jam weekend rush_hour high_temper low_temper sunny rain_snow drive_score gap_time	
	
	save $results/data_full_sample.dta, replace
restore
	
	


*********************************************
* Start doing IV estimation using full sample
*********************************************


foreach nm of varlist diff_hb1 diff_hb2{

	
	ivregress gmm diff_phone_use diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_phone_use l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).total_phone_use l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/reg_full.xls, append keep(l.`nm')


	ivregress gmm diff_distance diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_distance l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).distance l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/reg_full.xls, append keep(l.`nm')
	
	
	ivregress gmm diff_highway diff_traffic_jam diff_weekend diff_rush_hour ///
	diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
	(l.diff_highway l.`nm' l.diff_drive_score diff_gap_time ///
	= l(2).highway l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
	l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
	outreg2 using $results/reg_full.xls, append keep(l.`nm')
	
}

	
	

save $working_data/iv_reg_full_sample_072020.dta, replace







********************************
* Now start doing for subsamples
********************************


forvalues i=1(1)100{


use $working_data/iv_reg_full_sample_072020.dta, clear

gen random_draw=runiform() if day_index==1 
gen temp_random=1 if random_draw>=0.8 & random_draw~=.
bysort user_id: egen sum_random=total(temp_random)

keep if sum_random==0

preserve
	collapse (mean) total_phone_use distance highway ///
	speed drive_at_night total_traffic_jam weekend rush_hour high_temper low_temper sunny rain_snow drive_score gap_time	
	
	save $results/data_`i'.dta, replace
restore


	foreach nm of varlist diff_hb1 diff_hb2{

		
		ivregress gmm diff_phone_use diff_traffic_jam diff_weekend diff_rush_hour ///
		diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
		(l.diff_phone_use l.`nm' l.diff_drive_score diff_gap_time ///
		= l(2).total_phone_use l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
		l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
		outreg2 using $results/reg_`i'.xls, append keep(l.`nm')


		ivregress gmm diff_distance diff_traffic_jam diff_weekend diff_rush_hour ///
		diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
		(l.diff_distance l.`nm' l.diff_drive_score diff_gap_time ///
		= l(2).distance l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
		l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
		outreg2 using $results/reg_`i'.xls, append keep(l.`nm')
		
		
		ivregress gmm diff_highway diff_traffic_jam diff_weekend diff_rush_hour ///
		diff_high_temper diff_low_temper diff_sunny  diff_rain diff_snow ///
		(l.diff_highway l.`nm' l.diff_drive_score diff_gap_time ///
		= l(2).highway l(2).drive_score l(2).gap_time l(2).weekend l(2).rush_hour ///
		l(2).total_traffic_sec l(2).high_temper l(2).low_temper l(2).sunny l(2).rain l(2).snow), vce(cluster user_id) nocons
		outreg2 using $results/reg_`i'.xls, append keep(l.`nm')

	}


}



**************************
* Load estimateion results
**************************


** combine all covariates
use $results/data_full_sample.dta, clear

forvalues i=1(1)100{
	append using $results/data_`i'.dta
}

export delimited using $results/covariates.csv, replace


** clean coefficients data
import delimited $results/reg_full.txt, clear
drop v1
keep if _n==4 | _n==6
replace v5=v5[_n+1] if v5==""
replace v6=v6[_n+1] if v6==""
replace v7=v7[_n+1] if v7==""
drop if _n==2

forvalues i=2(1)7{
	
	gen temp=strpos(v`i',"*")
	replace v`i'=substr(v`i', 1, temp-1) if temp>1
	drop temp
	destring v`i', force replace
}

save $results/reg_coef_full.dta, replace



forvalues j=1(1)100{

	import delimited $results/reg_`j'.txt, clear
	drop v1
	keep if _n==4 | _n==6
	replace v5=v5[_n+1] if v5==""
	replace v6=v6[_n+1] if v6==""
	replace v7=v7[_n+1] if v7==""
	drop if _n==2

	forvalues i=2(1)7{
		
		gen temp=strpos(v`i',"*")
		replace v`i'=substr(v`i', 1, temp-1) if temp>1
		drop temp
		destring v`i', force replace
	}
	save $results/reg_coef_`j'.dta, replace

}







** combine all coefficients data
use $results/reg_coef_full.dta, clear
erase $results/reg_coef_full.dta

forvalues i=1(1)100{
	append using $results/reg_coef_`i'.dta
	erase $results/reg_coef_`i'.dta
}
export delimited using $results/coefs.csv, replace






